library(ggplot2)
library(gcookbook)system("convert /home/fdrennan/Documents/r/graphics/images/meggan.jpg /home/fdrennan/Documents/r/graphics/images/meggan.ppm")
img = pixmap::read.pnm("/home/fdrennan/Documents/r/graphics/images/meggan.ppm")
red.map = matrix(img@red[img@size[1]:1,], img@size[1], img@size[2], byrow = TRUE)
image(red.map)My Girlfriend Hanging out in R
library(knitr)
library(rgl)
knit_hooks$set(webgl = hook_webgl)plot3d(mtcars$wt, mtcars$hp, mtcars$mpg, type = "s", size = 0.75, lit= FALSE, xlab = "Weight", ylab = "Horsepower", zlab = "Miles/Gallon", col = "black ")You must enable Javascript to view this page properly.
library(corrplot)
mcor = round(cor(mtcars), 2)
corrplot(mcor)corrplot(mcor, method="shade", shade.col=NA, tl.col="black", tl.srt=45)# May need to install first, with install.packages("igraph")
library(igraph)
# Specify edges for a directed graph
gd <- graph(c(1,2, 2,3, 2,4, 1,4, 5,5, 3,6))
plot(gd)# For an undirected graph
gu <- graph(c(1,2, 2,3, 2,4, 1,4, 5,5, 3,6), directed=FALSE)
# No labels
plot(gu, vertex.label=NA)set.seed(229)DT::datatable(madmen2)# Create a graph object from the data set
g <- graph.data.frame(madmen2, directed=TRUE)
par(mar=c(0,0,0,0))
plot(g, layout=layout.fruchterman.reingold, vertex.size=8, edge.arrow.size=0.5,
vertex.label=NA)g <- graph.data.frame(madmen, directed=FALSE)
par(mar=c(0,0,0,0)) # Remove unnecessary margins
plot(g, layout=layout.circle, vertex.size=8, vertex.label=NA)m <- madmen[1:nrow(madmen) %% 2 == 1, ]
g <- graph.data.frame(m, directed=FALSE)
# Print out the names of each vertex
V(g)$name## [1] "Betty Draper" "Don Draper" "Harry Crane"
## [4] "Joan Holloway" "Lane Pryce" "Peggy Olson"
## [7] "Pete Campbell" "Roger Sterling" "Sal Romano"
## [10] "Henry Francis" "Allison" "Candace"
## [13] "Faye Miller" "Megan Calvet" "Rachel Menken"
## [16] "Suzanne Farrell" "Hildy" "Franklin"
## [19] "Rebecca Pryce" "Abe Drexler" "Duck Phillips"
## [22] "Playtex bra model" "Ida Blankenship" "Mirabelle Ames"
## [25] "Vicky" "Kitty Romano"
plot(g, layout=layout.fruchterman.reingold, vertex.size = 4, # Smaller nodes
vertex.label = V(g)$name, # Set the labels
vertex.label.cex = 0.8, # Slightly smaller font
vertex.label.dist = 0.4, # Offset the labels
vertex.label.color = "black")# Given a model, predict zvar from xvar and yvar
# Defaults to range of x and y variables, and a 16x16 grid
predictgrid <- function(model, xvar, yvar, zvar, res = 16, type = NULL) {
# Find the range of the predictor variable. This works for lm and glm
# and some others, but may require customization for others.
xrange <- range(model$model[[xvar]])
yrange <- range(model$model[[yvar]])
newdata <- expand.grid(x = seq(xrange[1], xrange[2], length.out = res),
y = seq(yrange[1], yrange[2], length.out = res))
names(newdata) <- c(xvar, yvar)
newdata[[zvar]] <- predict(model, newdata = newdata, type = type)
newdata
}
# Convert long-style data frame with x, y, and z vars into a list
# with x and y as row/column values, and z as a matrix.
df2mat <- function(p, xvar = NULL, yvar = NULL, zvar = NULL) {
if (is.null(xvar)) xvar <- names(p)[1]
if (is.null(yvar)) yvar <- names(p)[2]
if (is.null(zvar)) zvar <- names(p)[3]
x <- unique(p[[xvar]])
y <- unique(p[[yvar]])
z <- matrix(p[[zvar]], nrow = length(y), ncol = length(x))
m <- list(x, y, z)
names(m) <- c(xvar, yvar, zvar)
m
}
# Function to interleave the elements of two vectors
interleave <- function(v1, v2) as.vector(rbind(v1,v2))
library(rgl)
# Make a copy of the data set
m <- mtcars
# Generate a linear model
mod <- lm(mpg ~ wt + disp + wt:disp, data = m)
# Get predicted values of mpg from wt and disp
m$pred_mpg <- predict(mod)
# Get predicted mpg from a grid of wt and disp
mpgrid_df <- predictgrid(mod, "wt", "disp", "mpg")
mpgrid_list <- df2mat(mpgrid_df)
# Make the plot with the data points
plot3d(m$wt, m$disp, m$mpg, type="s", size=0.5, lit=FALSE)
# Add the corresponding predicted points (smaller)
spheres3d(m$wt, m$disp, m$pred_mpg, alpha=0.4, type="s", size=0.5, lit=FALSE)
# Add line segments showing the error
segments3d(interleave(m$wt, m$wt),
interleave(m$disp, m$disp),
interleave(m$mpg, m$pred_mpg),
alpha=0.4, col="red")
# Add the mesh of predicted values
surface3d(mpgrid_list$wt,
mpgrid_list$disp,
mpgrid_list$mpg,
alpha=0.4, front="lines", back="lines")plot3d(mtcars$wt, mtcars$disp, mtcars$mpg,
xlab = "", ylab = "", zlab = "",
axes = FALSE,
size=.5, type="s", lit=FALSE)
# Add the corresponding predicted points (smaller)
spheres3d(m$wt, m$disp, m$pred_mpg, alpha=0.4, type="s", size=0.5, lit=FALSE)
# Add line segments showing the error
segments3d(interleave(m$wt, m$wt),
interleave(m$disp, m$disp),
interleave(m$mpg, m$pred_mpg),
alpha=0.4, col="red")
# Add the mesh of predicted values
surface3d(mpgrid_list$wt,
mpgrid_list$disp,
mpgrid_list$mpg,
alpha=0.4, front="lines", back="lines")
# Draw the box
rgl.bbox(color="grey50",
emission="grey50",
xlen=0, ylen=0, zlen=0)
# Set default color of future objects to black
rgl.material(color="black")
# Add axes to specific sides. Possible values are "x--", "x-+", "x+-", and "x++".
axes3d(edges=c("x--", "y+-", "z--"),
ntick=6,
# Attempt 6 tick marks on each side
cex=.75)
# Smaller font
# Add axis labels. 'line' specifies
mtext3d("Weight", edge="x--", line=2)
mtext3d("Displacement", edge="y+-",line=3)
mtext3d("MPG", edge="z--",line=3)islice <- subset(isabel, z == min(z))
head(islice[,1:3])## x y z
## 1 -83 41.70000 0.035
## 2 -83 41.55571 0.035
## 3 -83 41.41142 0.035
## 4 -83 41.26713 0.035
## 5 -83 41.12285 0.035
## 6 -83 40.97856 0.035
slice <- subset(isabel, z == min(z))
ggplot(islice, aes(x=x, y=y)) +
geom_segment(aes(xend = x + vx/50, yend = y + vy/50),
size = 0.25) # Make the line segments 0.25 mm thick# Take a slice where z is equal to the minimum value of z
islice <- subset(isabel, z == min(z))
# Keep 1 out of every 'by' values in vector x
every_n <- function(x, by = 2) {
x <- sort(x)
x[seq(1, length(x), by = by)]
}
# Keep 1 of every 4 values in x and y
keepx <- every_n(unique(isabel$x), by=4)
keepy <- every_n(unique(isabel$y), by=4)
# Keep only those rows where x value is in keepx and y value is in keepy
islicesub <- subset(islice, x %in% keepx & y %in% keepy)
# Need to load grid for arrow() function
library(grid)
# Make the plot with the subset, and use an arrowhead 0.1 cm long
ggplot(islicesub, aes(x=x, y=y)) +
geom_segment(aes(xend = x+vx/50, yend = y+vy/50),
arrow = arrow(length = unit(0.1, "cm")), size = 0.25)The existing ‘speed’ column includes the z component. We’ll calculate speedxy, the horizontal speed.
islicesub$speedxy <- sqrt(islicesub$vx^2 + islicesub$vy^2)
# Map speed to alpha
ggplot(islicesub, aes(x=x, y=y)) +
geom_segment(aes(xend = x+vx/50, yend = y+vy/50, alpha = speed),
arrow = arrow(length = unit(0.1,"cm")), size = 0.6)# Get USA map data
usa <- map_data("usa")
# Map speed to colour, and set go from "grey80" to "darkred"
ggplot(islicesub, aes(x=x, y=y)) +
geom_segment(aes(xend = x+vx/50, yend = y+vy/50, colour = speed),
arrow = arrow(length = unit(0.1,"cm")), size = 0.6) +
scale_colour_continuous(low="grey80", high="darkred") +
geom_path(aes(x=long, y=lat, group=group), data=usa) +
coord_cartesian(xlim = range(islicesub$x), ylim = range(islicesub$y))# Keep 1
keepx <-every_n(unique(isabel$x), by=5)
keepy <- every_n(unique(isabel$y), by=5)
keepz <-every_n(unique(isabel$z), by=2)
isub <- subset(isabel, x %in% keepx & y %in% keepy & z %in% keepz)
ggplot(isub, aes(x=x, y=y)) +
geom_segment(aes(xend = x+vx/50, yend = y+vy/50, colour = speed),
arrow = arrow(length = unit(0.1,"cm")), size = 0.5) +
scale_colour_continuous(low="grey80", high="darkred") +
facet_wrap( ~ z)library(maps) # For map data
# Get map data for USA
states_map <- map_data("state")
ggplot(states_map, aes(x=long, y=lat, group=group)) +
geom_polygon(fill="white", colour="black")# geom_path (no fill) and Mercator projection
ggplot(states_map, aes(x=long, y=lat, group=group)) +
geom_path() + coord_map("mercator")# Get map data for world
world_map <- map_data("world")
east_asia <- map_data("world",
region=c("Japan", "China", "North Korea", "South Korea"))
# Map region to fill color
ggplot(east_asia, aes(x=long, y=lat, group=group, fill=region)) +
geom_polygon(colour="black") +
scale_fill_brewer(palette="Set2")# Get New Zealand data from world map
nz1 <- map_data("world", region="New Zealand")
nz1 <- subset(nz1, long > 0 & lat > -48)
# Trim off islands
ggplot(nz1, aes(x=long, y=lat, group=group)) + geom_path()# Get New Zealand data from the nz map
nz2 <- map_data("nz")
ggplot(nz2, aes(x=long, y=lat, group=group)) + geom_path()library(gtrendsR)
returnTrend <- gtrends(c("Trump"), geo = "US")
stateInterest = returnTrend$interest_by_region
# crimes <- data.frame(state = tolower(stateInterest$location), USArrests)
ggplot(stateInterest, aes(map_id = tolower(location), fill=hits)) +
geom_map(map = states_map, colour="black") +
scale_fill_gradient2(low="#559999", mid="grey90", high="#BB650B",
midpoint=median(stateInterest$hits)) +
expand_limits(x = states_map$long, y = states_map$lat) +
coord_map("polyconic") +
ggtitle("Who is Googling about Trump?")To set the parameters for the plot look at this link here. My header for the notebook looks like this.
Parameters are useful when you want to re-render the same report with distinct values for various key inputs, for example: + Running a report specific to a department or geographic region. + Running a report that covers a specific period in time. + Running multiple versions of a report for distinct sets of core assumptions.
```
library(marmap)
data(list = params$data)
ggplot2::autoplot(get(params$data))
ggplot2ggplotHere we’ll use quantmod to get some stock data and then we’ll plot the change in price with the volume of the day.
library(quantmod)
GSPC = invisible(getSymbols("^GSPC", auto.assign = FALSE))
GSPCReturns = dailyReturn(GSPC, type = "log")
xts2df = function(xtsObject, date = "1900-01-01"){
date = paste0(date, "::")
dailyReturns = dailyReturn(xtsObject[date], type = "log")
xtsdf = as.data.frame(xtsObject[date])
theNames = colnames(xtsdf)
returns = as.data.frame(dailyReturns)[,1]
pos = returns > 0
xtsdf = cbind(as.Date(rownames(as.data.frame(xtsdf))),
xtsdf,
returns,
cumsum(returns),
pos)
colnames(xtsdf) = c("Date",
theNames,
"LOG.Returns",
"SUM.Returns",
"Pos")
xtsdf
}
gspc = xts2df(GSPC, date = "2008-01-01")ggplot(gspc, aes(x = LOG.Returns, y = GSPC.Volume)) +
geom_point() ggplot(gspc, aes(x = seq_along(LOG.Returns), y = SUM.Returns)) +
geom_line() +
xlab("Time") +
ylab("Return")ggplot(gspc, aes(x=LOG.Returns)) +
geom_histogram(binwidth = .001) +
xlab("Returns") +
ylab("Frequency")myfun = function(x) {
10*sin(x) + 2*x
}
ggplot(data.frame(x=c(-3*pi, 3*pi)), aes(x=x)) +
stat_function(fun=myfun, geom="line")recentgspc = xts2df(GSPC, date = "2017-01-01")
ggplot(recentgspc, aes(x=Date, y = LOG.Returns, fill = Pos)) +
geom_bar(stat="identity",
position="identity",
colour="black",
size = 0.5)library(data.table)
tickers = fread("http://www.nasdaq.com/screening/companies-by-industry.aspx?exchange=NASDAQ&render=download")
tickers = as.data.frame(tickers)
for(i in c(2,3,4,5,6)) {
tickers[,i] = as.numeric(tickers[,i])
}
for(i in c(7, 8)) {
tickers[, i] = as.factor(tickers[, i])
}tickersNoNA = subset(tickers, tickers$Sector != "n/a")
ggplot(tickersNoNA, aes(x=Sector, y=log(MarketCap))) +
geom_boxplot() +
xlab("Sector") +
ylab("Log of Market Cap") +
theme(axis.text.x = element_text(angle = 35, hjust = 1)) +
labs(title = "Log of Market Cap for S&P 500 Sectors")library(gcookbook)
library(plyr)
uspopage_prop = ddply(uspopage, "Year",
transform,
Percent = Thousands / sum(Thousands) * 100)
ggplot(uspopage_prop, aes(x=Year, y=Percent, fill=AgeGroup)) +
geom_area(colour="black", size=.2, alpha=.4) +
scale_fill_brewer(palette="Blues", breaks=rev(levels(uspopage$AgeGroup)))library(quantmod)
AAPL = getSymbols("AAPL",auto.assign = FALSE)
aapl=xts2df(AAPL, date = "2008-01-01")
capmData = data.frame(aapl = aapl$LOG.Returns, gspc = gspc$LOG.Returns)
ggplot(capmData, aes(x = gspc, y = aapl)) +
stat_smooth(method = lm) +
geom_point(size = .005) +
xlim(-.05, .05)+
ylim(-.05, .05) ggplot(tickersNoNA, aes(x=Sector, y=log(MarketCap))) +
geom_violin(scale = "count", adjust = .5) +
xlab("Sector") +
ylab("Log of Market Cap") +
theme(axis.text.x = element_text(angle = 35, hjust = 1)) +
labs(title = "Log of Market Cap for S&P 500 Sectors")ggplot(heightweight, aes(x=sex, y=heightIn)) +
geom_boxplot(aes(x=as.numeric(sex) + .2, group=sex), width=.25) +
geom_dotplot(aes(x=as.numeric(sex) - .2, group=sex), binaxis="y",
binwidth=.5, stackdir="center") +
scale_x_continuous(breaks=1:nlevels(heightweight$sex),
labels=levels(heightweight$sex))p = ggplot(faithful, aes(x=eruptions, y=waiting)) +
geom_point() +
stat_density2d()
# Contour lines, with "height" mapped to color
p + stat_density2d(aes(colour=..level..))# Map density estimate to fill color
p + stat_density2d(aes(fill=..density..), geom="raster", contour=FALSE)# With points, and map density estimate to alpha
p + geom_point() +
stat_density2d(aes(alpha=..density..), geom="tile", contour=FALSE)p + stat_density2d(aes(fill=..density..), geom="raster",
contour=FALSE, h=c(.5,5))p = ggplot(faithful, aes(x=eruptions, y=waiting)) +
geom_point()
p + annotate("text", x=3, y=48, label="Group 1") +
annotate("text", x=4.5, y=66, label="Group 2")p + annotate("text", x=3, y=48, label="Group 1", family="serif",
fontface="italic", colour="darkred", size=3) +
annotate("text", x=4.5, y=66, label="Group 2", family="serif",
fontface="italic", colour="darkred", size=3)p + annotate("text", x=3, y=48, label="Group 1", alpha=.1) +
geom_text(x=4.5, y=66, label="Group 2", alpha=.1)p + annotate("text", x=-Inf, y=Inf, label="Upper left", hjust=-.2, vjust=2) +
annotate("text", x=mean(range(faithful$eruptions)), y=-Inf, vjust=-0.4,
label="Bottom middle")# A normal curve
p <- ggplot(data.frame(x=c(-3,3)), aes(x=x)) + stat_function(fun = dnorm)
p + annotate("text", x=2, y=0.3, parse=TRUE,
label="frac(1, sqrt(2 * pi)) * e ^ {-x^2 / 2}")p + annotate("text", x=0, y=0.05, parse=TRUE, size=4,
label="'Function: ' * y==frac(1, sqrt(2*pi)) * e^{-x^2/2}")p <- ggplot(heightweight, aes(x=ageYear, y=heightIn, colour=sex)) + geom_point()
# Add horizontal and vertical lines
p + geom_hline(yintercept=60) + geom_vline(xintercept=14)# Add angled line
p + geom_abline(intercept=37.4, slope=1.75)pd <- position_dodge(.3)
# Save the dodge spec because we use it repeatedly
ggplot(cabbage_exp, aes(x=Date, y=Weight, colour=Cultivar, group=Cultivar)) +
geom_errorbar(aes(ymin=Weight-se, ymax=Weight+se),
width=.2, size=0.25, colour="black", position=pd) +
geom_line(position=pd) +
geom_point(position=pd, size=2.5)# The base plot
p <- ggplot(mpg, aes(x=displ, y=hwy)) + geom_point() + facet_grid(. ~ drv)
# A data frame with labels for each facet
f_labels <- data.frame(drv = c("4", "f", "r"), label = c("4wd", "Front", "Rear"))
p + geom_text(x=6, y=40, aes(label=label), data=f_labels)# If you use annotate(), the label will appear in all facets
p + annotate("text", x=6, y=42, label="label text")# This function returns a data frame with strings representing the regression
# equation, and the r^2 value
# These strings will be treated as R math expressions
lm_labels <- function(dat) {
mod <- lm(hwy ~ displ, data=dat)
formula <- sprintf("italic(y) == %.2f %+.2f * italic(x)",
round(coef(mod)[1], 2), round(coef(mod)[2], 2))
r <- cor(dat$displ, dat$hwy)
r2 <- sprintf("italic(R^2) == %.2f", r^2)
data.frame(formula=formula, r2=r2, stringsAsFactors=FALSE)
}
library(plyr) # For the ddply() function
labels <- ddply(mpg, "drv", lm_labels)
# Plot with formula and R^2 values
p + geom_smooth(method=lm, se=FALSE) +
geom_text(x=3, y=40, aes(label=formula), data=labels, parse=TRUE, hjust=0) +
geom_text(x=3, y=35, aes(label=r2), data=labels, parse=TRUE, hjust=0)ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot()ggplot(PlantGrowth, aes(x=group, y=weight)) + geom_boxplot() + coord_flip()hwp <- ggplot(heightweight, aes(x=ageYear, y=heightIn)) +
geom_point()
hwphwp + scale_y_continuous(breaks=c(50, 56, 60, 66, 72),
labels=c("Tiny", "Really\nshort", "Short",
"Medium", "Tallish"))countries = gcookbook::countries
sp <- ggplot(subset(countries, Year==2009 & healthexp>2000),
aes(x=healthexp, y=infmortality)) +
geom_point() +
annotate("text", x=4350, y=5.4, label="Canada") +
annotate("text", x=7400, y=6.8, label="USA")
ggplot(subset(countries, Year==2009 & healthexp>2000),
aes(x=healthexp, y=infmortality)) +
geom_point() +
geom_text(aes(label=Name), size=3, vjust=3)Stock chart with a linear x-axis and log y-axis; bottom: with manual breaks
library(gcookbook)
aapl = gcookbook::aapl
ggplot(aapl, aes(x=date,y=adj_price)) + geom_line()ggplot(aapl, aes(x=date,y=adj_price)) + geom_line() +
scale_y_log10(breaks=c(2,10,50,250))ggplot(economics, aes(x=date, y=psavert)) + geom_line()# Take a subset of economics
econ <- subset(economics, date >= as.Date("1992-05-01") & date < as.Date("1993-06-01"))
# Base plot - without specifying breaks
p <- ggplot(econ, aes(x=date, y=psavert)) + geom_line()
p# Specify breaks as a Date vector
datebreaks <- seq(as.Date("1992-06-01"), as.Date("1993-06-01"), by="2 month")
# Use breaks, and rotate text labels
p + scale_x_date(breaks=datebreaks) +
theme(axis.text.x = element_text(angle=30, hjust=1))plibrary(scales)
p + scale_x_date(breaks=datebreaks, labels=date_format("%Y %b")) +
theme(axis.text.x = element_text(angle=30, hjust=1))# Convert WWWusage time-series object to data frame
www <- data.frame(minute = as.numeric(time(WWWusage)),
users = as.numeric(WWWusage))
# Define a formatter function - converts time in minutes to a string
timeHM_formatter <- function(x) {
h <- floor(x/60)
m <- floor(x %% 60)
lab <- sprintf("%d:%02d", h, m) # Format the strings as HH:MM
return(lab)
}
# Default x axis
ggplot(www, aes(x=minute, y=users)) + geom_line()# With formatted times
ggplot(www, aes(x=minute, y=users)) + geom_line() +
scale_x_continuous(name="time", breaks=seq(0, 100, by=10),
labels=timeHM_formatter)# Base plot
p <- ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup)) + geom_area()
# These three have the same effect
pp + scale_fill_discrete()p + scale_fill_hue()# ColorBrewer palette
p + scale_fill_brewer()# Basic scatter plot
h <- ggplot(heightweight, aes(x=ageYear, y=heightIn, colour=sex)) +
geom_point()
# Default lightness = 65
h# Slightly darker
h + scale_colour_hue(l=45)# Base plot
p <- ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup)) + geom_area()
# The palette with grey:
cb_palette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")
# Add it to the plot
p + scale_fill_manual(values=cb_palette)cb <- subset(climate, Source=="Berkeley")
cb$valence[cb$Anomaly10y >= 0] <- "pos"
cb$valence[cb$Anomaly10y < 0] <- "neg"
ggplot(cb, aes(x=Year, y=Anomaly10y)) +
geom_area(aes(fill=valence)) +
geom_line() +
geom_hline(yintercept=0)